{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7fede23e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd                                       # DataFrames and plotting\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt                           # plotting\n",
    "from matplotlib.colors import ListedColormap              # custom color maps\n",
    "import matplotlib.ticker as mtick\n",
    "from matplotlib.patches import Rectangle\n",
    "import matplotlib as mpl\n",
    "from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
    "from numpy.linalg import eig                              # Eigen values and Eigen vectors\n",
    "from sklearn.decomposition import PCA                     # PCA program from scikit learn (package for machine learning)\n",
    "from sklearn.preprocessing import StandardScaler          # normalize synthetic data\n",
    "from ipywidgets import interactive                        # widgets and interactivity\n",
    "from ipywidgets import widgets                            \n",
    "from ipywidgets import Layout\n",
    "from ipywidgets import Label\n",
    "from ipywidgets import VBox, HBox\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd001700",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12effd17",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70e4f054",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80e526c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "m = 4\n",
    "mean = np.zeros((m))\n",
    "#cov = np.zeros((m,m))\n",
    "cov = np.full((m,m),0.8)\n",
    "for i in range(0,m):\n",
    "    cov[i,i] = 1.0\n",
    "cov[2,3] = cov[3,2] = 0.2; cov[1,3] = cov[3,1] = -0.2; cov[2,0] = cov[0,2] = 0.4; cov[1,0] = cov[0,1] = -0.5\n",
    "\n",
    "data = np.random.multivariate_normal(mean = mean, cov = cov, size = 1000)\n",
    "data = StandardScaler(copy=True, with_mean=True, with_std=True).fit(data).transform(data)\n",
    "\n",
    "df = pd.DataFrame(data, columns=np.array(['' + str(i) for i in range(0, m)]))\n",
    "\n",
    "plt.subplot(121)                                        # plot correlation matrix with significance colormap\n",
    "sns.heatmap(cov,vmin = -1.0, vmax = 1.0,linewidths=.5, fmt= '.1f',cmap = plt.cm.Spectral_r)\n",
    "plt.title('Target Covariance Matrix')\n",
    "\n",
    "plt.subplot(122)\n",
    "sns.heatmap(df.iloc[:,:].corr(),vmin = -1.0, vmax = 1.0,linewidths=.5, fmt= '.1f',cmap = plt.cm.Spectral_r)\n",
    "plt.title('Actual Covariance Matrix')\n",
    "\n",
    "plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.1, wspace=0.2, hspace=0.2); plt.show()\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26aa283f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[:,:].corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea5b2515",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83b1e382",
   "metadata": {},
   "outputs": [],
   "source": [
    "nsample = 100\n",
    "dpalette = sns.color_palette(\"rocket_r\",n_colors = 3)   # matrix scatter plot with points and density estimator\n",
    "palette = sns.color_palette(\"rocket\")\n",
    "matrixplot = sns.pairplot(df.sample(n=nsample),diag_kind = 'kde',palette = dpalette,diag_kws={'edgecolor':'black'},plot_kws=dict(s=50, edgecolor=\"black\", linewidth=0.5,alpha=0.2))\n",
    "matrixplot.map_lower(sns.kdeplot, levels=3, color=\"black\")\n",
    "plt.subplots_adjust(left=0.0, bottom=0.0, right=0.5, top=0.6, wspace=0.2, hspace=0.3); plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58970b63",
   "metadata": {},
   "outputs": [],
   "source": [
    "cov_actual = np.cov(data,rowvar = False)\n",
    "eigen_values,eigen_vectors = eig(cov_actual)\n",
    "sorted_indices = np.argsort(-eigen_values)\n",
    "sorted_eigen_vectors = eigen_vectors[:, sorted_indices]\n",
    "sorted_eigen_values = np.sort(-eigen_values)*-1\n",
    "\n",
    "plt.subplot(121)\n",
    "plt.plot(np.arange(1,5,1),np.cumsum(sorted_eigen_values)/np.sum(sorted_eigen_values)*100,color='darkorange',alpha=0.8)\n",
    "plt.scatter(np.arange(1,5,1),np.cumsum(sorted_eigen_values)/np.sum(sorted_eigen_values)*100,color='darkorange',alpha=0.8,edgecolor='black')\n",
    "plt.plot([1,4],[95,95], color='black',linestyle='dashed')\n",
    "plt.xlabel('Principal Component'); plt.ylabel('Cumulative Variance Explained'); plt.title('Cumulative Variance Explained by Principal Component')\n",
    "fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'\n",
    "yticks = mtick.FormatStrFormatter(fmt); \n",
    "plt.xlim(1,4); plt.ylim(0,100.0); plt.annotate('95% variance explained',[3.0,90])\n",
    "plt.gca().yaxis.set_major_formatter(yticks)\n",
    "\n",
    "plt.subplot(122)\n",
    "im = plt.imshow(sorted_eigen_vectors,cmap = plt.cm.Spectral_r)\n",
    "plt.title('Actual Covariance Matrix')\n",
    "cbar = plt.colorbar(\n",
    "    im, orientation=\"vertical\", ticks=np.linspace(-1, 1, 10)\n",
    ")\n",
    "cbar.set_label('Component Loadings', rotation=270, labelpad=20)\n",
    "plt.xlim([-0.5,3.5]); plt.ylim([-0.5,3.5])\n",
    "plt.gca().set_xticks([0,1, 2, 3],[1,2,3,4]); plt.gca().set_yticks([0,1, 2, 3],[1,2,3,4])\n",
    "for x in np.arange(0.5,4.5,1.0):\n",
    "    plt.plot([x,x],[-0.5,3.5],c='black',lw=3)\n",
    "    plt.plot([-0.5,3.5],[x,x],c='black',lw=1,ls='--')\n",
    "\n",
    "plt.subplots_adjust(left=0.0, bottom=0.0, right=2.01, top=0.9, wspace=0.2, hspace=0.3); plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5142ac6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(6, 6))\n",
    "gs = fig.add_gridspec(2,2 ,width_ratios=(1.0, 1.0))\n",
    "\n",
    "plt_center = fig.add_subplot(gs[0, 1])\n",
    "plt_x = fig.add_subplot(gs[0, 0],sharey=plt_center) \n",
    "plt_y = fig.add_subplot(gs[1, 1],sharex=plt_center) \n",
    "plt_extra = fig.add_subplot(gs[1, 0]) \n",
    "\n",
    "# im = plt_center.imshow(sorted_eigen_vectors,cmap = plt.cm.Spectral_r)\n",
    "# plt_center.set_title('Actual Covariance Matrix')\n",
    "# cbar = plt.colorbar(\n",
    "#     im, orientation=\"vertical\", ticks=np.linspace(-1, 1, 10)\n",
    "# )\n",
    "# cbar.set_label('Component Loadings', rotation=270, labelpad=20)\n",
    "\n",
    "for i in range(0,m):\n",
    "    for j in range(0,m):\n",
    "        color = (sorted_eigen_vectors[j,i] + 1.0)/(2.0)\n",
    "        plt_center.add_patch(Rectangle((i-0.5,j-0.5), 1, 1,color = plt.cm.RdGy_r(color),fill=True))\n",
    "\n",
    "plt_center.set_xlim([-0.5,3.5]); plt_center.set_ylim([-0.5,3.5])\n",
    "plt_center.set_xticks([0,1, 2, 3],[1,2,3,4]); plt_center.set_yticks([0,1, 2, 3],[1,2,3,4])\n",
    "for x in np.arange(0.5,3.5,1.0):\n",
    "    plt_center.plot([x,x],[-0.5,3.5],c='black',lw=3)\n",
    "    plt_center.plot([-0.5,3.5],[x,x],c='black',lw=1,ls='--')\n",
    "plt_center.set_title('Eigen Vectors / Principal Component Loadings')  \n",
    "\n",
    "plt_x.barh(y=np.array([0,1,2,3],dtype='float'),width=np.var(data,axis=0),color='darkorange',edgecolor='black')\n",
    "plt_x.set_xlim([1.2,0]); plt_x.set_yticks([0,1, 2, 3],[1,2,3,4])\n",
    "plt_x.set_ylabel('Feature'); plt_x.set_xlabel('Variance')\n",
    "plt_x.set_title('Original Feature Variance') \n",
    "\n",
    "plt_y.bar(x=np.array([0,1,2,3],dtype='float'),height=sorted_eigen_values,color='darkorange',edgecolor='black')\n",
    "plt_y.set_ylim([2.5,0]); plt_y.set_xticks([0,1, 2, 3],[1,2,3,4])\n",
    "plt_y.set_xlabel('Feature'); plt_y.set_ylabel('Variance')\n",
    "plt_y.set_title('Projected Feature Variance')  \n",
    "\n",
    "for i in range(0,m):\n",
    "    for j in range(0,m):\n",
    "        color = (cov_actual[j,i] + 1.0)/(2.0)\n",
    "        plt_extra.add_patch(Rectangle((i-0.5,j-0.5), 1, 1,color = plt.cm.bwr(color),fill=True))\n",
    "\n",
    "plt_extra.set_xlim([-0.5,3.5]); plt_extra.set_ylim([-0.5,3.5])\n",
    "plt_extra.set_xticks([0,1, 2, 3],[1,2,3,4]); plt_extra.set_yticks([0,1, 2, 3],[1,2,3,4])\n",
    "for x in np.arange(0.5,3.5,1.0):\n",
    "    plt_extra.plot([x,x],[-0.5,3.5],c='black',lw=3)\n",
    "    plt_extra.plot([-0.5,3.5],[x,x],c='black',lw=1,ls='--')\n",
    "plt_extra.set_title('Eigen Vectors / Principal Component Loadings')  \n",
    "\n",
    "plt.subplots_adjust(left=0.0, bottom=0.0, right=1.51, top=1.50, wspace=0.2, hspace=0.2); plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36698d45",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "\n",
    "m = 4; cstr = 0.0\n",
    "\n",
    "mean = np.zeros((m))                         # make inputs for multivariate dataset\n",
    "#cov = np.zeros((m,m))\n",
    "cov = np.full((m,m),0.8*cstr)\n",
    "for i in range(0,m):\n",
    "    cov[i,i] = 1.0\n",
    "cov[2,3] = cov[3,2] = 0.2*cstr; cov[1,3] = cov[3,1] = -0.2*cstr; cov[2,0] = cov[0,2] = 0.4*cstr; \n",
    "cov[1,0] = cov[0,1] = -0.5*cstr\n",
    "\n",
    "data = np.random.multivariate_normal(mean = mean, cov = cov, size = 1000) # draw samples from MV Gaussian\n",
    "data = StandardScaler(copy=True, with_mean=True, with_std=True).fit(data).transform(data)\n",
    "\n",
    "cov_actual = np.cov(data,rowvar = False)\n",
    "\n",
    "eigen_values,eigen_vectors = eig(cov_actual) # Eigen values and vectors \n",
    "sorted_indices = np.argsort(-eigen_values)\n",
    "sorted_eigen_vectors = eigen_vectors[:, sorted_indices]\n",
    "sorted_eigen_values = np.sort(-eigen_values)*-1\n",
    "\n",
    "fig = plt.figure(figsize=(6, 6))\n",
    "gs = fig.add_gridspec(2,2 ,width_ratios=(1.0, 1.0))\n",
    "\n",
    "plt_center = fig.add_subplot(gs[1, 1])\n",
    "plt_x = fig.add_subplot(gs[1, 0],sharey=plt_center) \n",
    "plt_y = fig.add_subplot(gs[0, 1],sharex=plt_center) \n",
    "plt_extra = fig.add_subplot(gs[0, 0]) \n",
    "\n",
    "for i in range(0,m):\n",
    "    for j in range(0,m):\n",
    "        color = (sorted_eigen_vectors[j,i] + 1.0)/(2.0)\n",
    "        plt_center.add_patch(Rectangle((i-0.5,j-0.5), 1, 1,color = plt.cm.RdGy_r(color),fill=True))\n",
    "        plt_center.annotate(np.round(sorted_eigen_vectors[j,i],1),(i-0.1,j-0.05))\n",
    "\n",
    "plt_center.set_xlim([-0.5,3.5]); plt_center.set_ylim([-0.5,3.5])\n",
    "plt_center.set_xticks([0,1, 2, 3],[1,2,3,4]); plt_center.set_yticks([0,1, 2, 3],[1,2,3,4])\n",
    "for x in np.arange(0.5,3.5,1.0):\n",
    "    plt_center.plot([x,x],[-0.5,3.5],c='black',lw=3)\n",
    "    plt_center.plot([-0.5,3.5],[x,x],c='black',lw=1,ls='--')\n",
    "plt_center.set_title('Eigen Vectors / Principal Component Loadings')  \n",
    "plt_center.set_xlabel('Eigen Vector'); plt_center.set_ylabel('Feature')\n",
    "\n",
    "plt_x.barh(y=np.array([0,1,2,3],dtype='float'),width=np.var(data,axis=0),color='darkorange',edgecolor='black')\n",
    "plt_x.set_xlim([1.2,0]); plt_x.set_yticks([0,1, 2, 3],[1,2,3,4])\n",
    "plt_x.set_ylabel('Feature'); plt_x.set_xlabel('Variance')\n",
    "plt_x.set_title('Original Feature Variance') \n",
    "\n",
    "plt_y.bar(x=np.array([0,1,2,3],dtype='float'),height=sorted_eigen_values,color='darkorange',edgecolor='black')\n",
    "plt_y.set_ylim([0,2.5]); plt_y.set_xticks([0,1, 2, 3],[1,2,3,4])\n",
    "plt_y.set_xlabel('Eigen Value'); plt_y.set_ylabel('Variance')\n",
    "plt_y.set_title('Sorted, Projected Feature Variance')  \n",
    "\n",
    "for i in range(0,m):\n",
    "    for j in range(0,m):\n",
    "        color = (cov_actual[j,i] + 1.0)/(2.0)\n",
    "        plt_extra.add_patch(Rectangle((i-0.5,j-0.5), 1, 1,color = plt.cm.BrBG(color),fill=True))\n",
    "\n",
    "plt_extra.set_xlim([-0.5,3.5]); plt_extra.set_ylim([3.5,-0.5])\n",
    "plt_extra.set_xticks([0,1, 2, 3],[1,2,3,4]); plt_extra.set_yticks([0,1, 2, 3],[1,2,3,4])\n",
    "for x in np.arange(0.5,3.5,1.0):\n",
    "    plt_extra.plot([x,x],[-0.5,3.5],c='black',lw=2)\n",
    "    plt_extra.plot([-0.5,3.5],[x,x],c='black',lw=2)\n",
    "plt_extra.set_title('Original Covariance Matrix')  \n",
    " \n",
    "cplt_extra = make_axes_locatable(plt_extra).append_axes('left', size='5%', pad=0.3)\n",
    "fig.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.Normalize(vmin=-1.0, vmax=1.0), cmap=plt.cm.BrBG),\n",
    "             cax=cplt_extra, orientation='vertical')\n",
    "cplt_extra.yaxis.set_ticks_position('left')\n",
    "\n",
    "plt.subplots_adjust(left=0.0, bottom=0.0, right=1.51, top=1.50, wspace=0.2, hspace=0.2); plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26c81bfe",
   "metadata": {},
   "outputs": [],
   "source": [
    "cov"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dfd37f62",
   "metadata": {},
   "outputs": [],
   "source": [
    "eigen_vectors_sorted[3][3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8ffc30bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "sorted_eigen_vectors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed00b8a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_components = 4\n",
    "pca = PCA(n_components=n_components,)\n",
    "pca.fit(data)\n",
    "print(np.round(pca.components_,3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6c58c46",
   "metadata": {},
   "outputs": [],
   "source": [
    "pca.explained_variance_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39565068",
   "metadata": {},
   "outputs": [],
   "source": [
    "eigen_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4fdb342b",
   "metadata": {},
   "outputs": [],
   "source": [
    "sorted_indices = np.argsort(-eigen_values)\n",
    "sorted_eigen_vectors = eigen_vectors[:, sorted_indices]\n",
    "sorted_eigen_vectors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4bd12717",
   "metadata": {},
   "outputs": [],
   "source": [
    "eigen_vectors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df84d9b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "sorted_indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b72f080",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
